{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from scipy.stats import norm\n",
    "import matplotlib.pylab as plt\n",
    "import pandas as pd\n",
    "from bokeh.layouts import row, widgetbox, layout, gridplot\n",
    "from bokeh.models import CustomJS, Slider\n",
    "from bokeh.plotting import figure, output_file, show, ColumnDataSource\n",
    "from bokeh.models.glyphs import MultiLine\n",
    "from bokeh.io import output_notebook\n",
    "from bokeh.models.widgets import Div\n",
    "%matplotlib inline\n",
    "output_notebook()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create Fake Data\n",
    "Let's start by generating a wee fake dataset."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "num_data = 5\n",
    "X = norm.rvs(size=(num_data,2), random_state=4)*2\n",
    "X = np.dot(X, np.linalg.cholesky([[1, 0.8], [0.8, 0.8]]))\n",
    "m = X.mean(axis=0)\n",
    "X = X - m"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's take a look at the data we generated."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "X"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "plt.scatter(X[:, 0],X[:, 1])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## An Interactive Plot\n",
    "Now we'll create an interactive plot with these data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# bokeh plot\n",
    "\n",
    "# pre-populate data source object\n",
    "z0 = np.nan*np.zeros(len(X))\n",
    "z1 = np.nan*np.zeros(len(X))\n",
    "x0s = [np.array(np.nan*np.zeros(2)) for i in range(0,len(X))]\n",
    "x1s = [np.array(np.nan*np.zeros(2)) for i in range(0,len(X))]\n",
    "d = np.nan*np.zeros(len(X))\n",
    "dzs = np.zeros(len(X))\n",
    "\n",
    "# data source\n",
    "source_data = ColumnDataSource(data=dict(x0=X[:,0], x1=X[:,1], z0=z0, z1=z1, x0s=x0s, x1s=x1s, d=d, dzs=dzs))\n",
    "\n",
    "# main plot\n",
    "plot = figure(x_range=(-3, 2.5), y_range=(-2.5, 2.5), plot_width=600, plot_height=600, match_aspect=True)\n",
    "\n",
    "# plot the new components in the mini-plot on the right\n",
    "var_plot = figure(x_range=(-3, 3), y_range=(-2.5, 2.5), plot_width = 300, plot_height = 300)\n",
    "var_plot.scatter('d', 'dzs', source=source_data, color='firebrick', alpha=0.6)\n",
    "\n",
    "# plot the data\n",
    "plot.scatter('x0', 'x1', source=source_data, color='navy', alpha=0.6, size=5)\n",
    "\n",
    "# plot the new axis\n",
    "plot.line('z0', 'z1', source=source_data, color='black', alpha=0.5)\n",
    "\n",
    "# plot the projections of the data onto the rotating line\n",
    "plot.scatter('z0', 'z1', source=source_data, color='firebrick', alpha=0.6)\n",
    "\n",
    "# plot all the lines connecting the data dots to the projection dots\n",
    "glyph = MultiLine(xs = \"x0s\", ys = \"x1s\", line_color=\"#8073ac\", line_width=1)\n",
    "plot.add_glyph(source_data, glyph)\n",
    "\n",
    "# the javascript code that calculates all the projection dots depending upon the user-chosen angle for the line to project onto\n",
    "callback = CustomJS(args=dict(source=source_data), code=\"\"\"\n",
    "    var data = source.data;\n",
    "    var x0 = data['x0']\n",
    "    var x1 = data['x1']\n",
    "    \n",
    "    var d = data['d']\n",
    "    \n",
    "    var z0 = data['z0']\n",
    "    var z1 = data['z1']\n",
    "    \n",
    "    var x0s = data['x0s']\n",
    "    var x1s = data['x1s']\n",
    "    \n",
    "    var A = ang.value*Math.PI/180;\n",
    "\n",
    "    var w = [Math.cos(A), Math.sin(A)];\n",
    "\n",
    "    for (var i = 0; i < x0.length; i++) {\n",
    "        d[i] = x0[i]*w[0] + x1[i]*w[1];\n",
    "        z0[i] = w[0]*d[i];\n",
    "        z1[i] = w[1]*d[i];\n",
    "        x0s[i] = [x0[i], z0[i]];\n",
    "        x1s[i] = [x1[i], z1[i]];\n",
    "    }\n",
    "\n",
    "    data['z0'] = z0\n",
    "    data['z1'] = z1\n",
    "    data['x0s'] = x0s\n",
    "    data['x1s'] = x1s\n",
    "    data['d'] = d\n",
    "    \n",
    "    source.change.emit();\n",
    "\"\"\")\n",
    "\n",
    "# make the slider bar\n",
    "ang_slider = Slider(start=0, end=180, value=1, step=.05, title=\"Angle\", callback=callback)\n",
    "callback.args[\"ang\"] = ang_slider\n",
    "\n",
    "# add output text\n",
    "div = Div(text=\"Instructions: There's a lot of code in the cell above! What just happened? The blue dots are our original data points. We are choosing the direction of the first PC for this dataset; this direction is represented by the black line. The red dots are the projections of the data onto the new basis direction. Using the slider bar, change the angle of the new basis direction, and observe how the projections and the variance of the projections, represented by the spread of the points in the plot on the right, change.\", width=900, height=100)\n",
    "\n",
    "# empty space div\n",
    "space_div_1 = Div(width=200, height=80)\n",
    "space_div_2 = Div(width=200, height=95)\n",
    "\n",
    "# plot layout\n",
    "lo = layout([\n",
    "  [[widgetbox(div), plot], [widgetbox(space_div_1), widgetbox(ang_slider), widgetbox(space_div_2), var_plot]],\n",
    "])\n",
    "\n",
    "# display\n",
    "show(lo)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Quiz\n",
    "Once you've played around with the interactive plot above, submit the following quiz. Based on what you've learned in the lessons about what PCA does, eyeball the plot above and choose the approximate direction of the first principal component for the above dataset. Submit your answer as an angle in degrees. Here, we are just looking for an approximate value, so we are not looking for a calculation of the correct angle, just an approximation. It will be scored correct if it's right to within ±10°."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import quiz_tests\n",
    "\n",
    "\n",
    "def direction_of_first_PC():\n",
    "    \"\"\"Returns the approximate direction of the first PC as an angle in degrees.\n",
    "\n",
    "    Returns\n",
    "    -------\n",
    "    close : np.float64\n",
    "        The approximate angle measure of the direction of the first PC in degrees.\n",
    "    \"\"\"\n",
    "    \n",
    "    # TODO: Implement Function\n",
    "    pass\n",
    "\n",
    "\n",
    "quiz_tests.test_direction_of_first_PC(direction_of_first_PC)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
